*Process SAT data

global raw1="$raw/SAT/"

***********************************
** SAT SCORES **
***********************************
	
foreach y of numlist 2016/2023 {
import excel "${raw1}/sat_`y'_student_level_data.xlsx", clear firstrow

rename *, lower

* Format
rename sat_read verbalsc
rename sat_math mathsc
*rename sat_write writesc /* no writing score for this year - SAT changed */

* Drop those who didn't take the SAT, just the SAT subject tests (SAT2)
drop if verbalsc == . & mathsc == .

gen year = `y'

*Code scores

rename verbalsc satvrecn
rename mathsc satmrecn 
gen sattot=satvrecn+satmrecn


*Center scores by year
foreach x of varlist satvrecn satmrecn  sattot {
	center `x' if `x'!=., standardize
}

rename sd_sasid sasid

keep year sasid satvrecn satmrecn sattot c_*
rename year yearsat
* Don't need to use the cross-walk b/c you have the sasids 
save "${saves}/sat`y'_cleaned.dta", replace

} 


foreach y of numlist 2007/2015 {
import excel "${raw1}/sat_`y'_student_level_data.xlsx", clear firstrow

rename *, lower

* Format
rename sat_read verbalsc
rename sat_math mathsc
rename sat_write writesc /* no writing score for this year - SAT changed */

* Drop those who didn't take the SAT, just the SAT subject tests (SAT2)
drop if verbalsc == . & mathsc == .

gen year = `y'

*Code scores

rename verbalsc satvrecn
rename mathsc satmrecn 
rename writesc satw
gen sattot=satvrecn+satmrecn+satw 


*Center scores by year
foreach x of varlist satvrecn satmrecn satw sattot {
	center `x' if `x'!=., standardize
}

rename sd_sasid sasid

keep year sasid satvrecn satmrecn satw sattot c_*
rename year yearsat
* Don't need to use the cross-walk b/c you have the sasids 
save "${saves}/sat`y'_cleaned.dta", replace

} 

		
		**********Combine and save**************
		clear
		foreach n of numlist 2007/2023 {
		
			append using "${saves}/sat`n'_cleaned.dta"
		
		}
			*these ones need SASIDS
		save "${saves}/sat_combined.dta", replace
			
***********************************
** GENERATE NEW VARIABLES AS THOSE
** IN THE SAT_WITH_SASIDS FILE 
***********************************

		use "${saves}/sat_combined.dta", clear
			  
			duplicates tag sasid, gen(tag)
			tab tag
			/* 2.7% had taken "multiple" times */
			
			sort sasid year
			duplicates drop sasid satvrecn satmrecn satw, force 
			drop if sasid ==.
			drop tag
			duplicates tag sasid, gen(tag)
			tab tag
			
			*KEEP ONLY THE LAST RECORD FOR THESE MULTIPLE-TEST TAKERS 
			*br if tag > 0
			set more off
			foreach var in satvrecn satmrecn satw {
				bys sasid: egen max_`var' = max(`var')
				 
			}
			gen max_sattot = max_satvrecn + max_satmrecn + max_satw 
			
			foreach var in satvrecn satmrecn satw sattot {
				bys sasid: replace `var' = max_`var'
			}
			drop max_sat*
			
			gsort sasid -year
			by sasid: keep if (_n==1 & tag > 0)|tag==0
			drop tag 
			
			*CHECK DUPLICATES
			duplicates tag sasid, gen(tag)
			count if tag~=0
			
			*KEEP ONLY RELEVANT RESULTS
			keep sasid year satvrecn satmrecn satw sattot 
			sort sasid			  
					 
		
save "$data_clean/sat.dta", replace
		
